List of used packages:
library(limma)
library(ggplot2)
library(sva)
library(heatmaply)
library(EnhancedVolcano)
library(dplyr)
library(clusterProfiler)
library(enrichplot)
library(DOSE)
library(gprofiler2)
We have chosen ComBat method of batch correction provided by sva package. Two proteins whose expression differed depending on the year of experiment were dropped.
dropped_genes <- rownames(dif_data_sva)
new_data_combat <- combat_edata[!(rownames(combat_edata) %in% dropped_genes), ]
This study aimed to deаine molecular mechanisms of cells osteogenic differentiation. So that we performed differential expression analysis of cells in undifferentiated and in osteogenic differentiation stages.
# Dif expression
X_diff <- model.matrix(~ Differentiation, data = data_factors)
# Build a linear model for each protein
fit_y <- lmFit(new_data_combat, design = X_diff, method = "robust", maxit = 1000)
# Empirical Bayes statistics
efit_y <- eBayes(fit_y)
topTable(efit_y, coef = 2)
## logFC AveExpr t P.Value adj.P.Val B
## NNMT 2.3113210 27.18386 11.867629 1.421392e-13 1.443872e-10 20.91736
## SERC -2.7488389 24.98259 -11.642234 2.396469e-13 1.443872e-10 20.40569
## P4HA1 -1.4818745 26.89469 -10.106495 9.981186e-12 4.009110e-09 16.75432
## LIMA1 -1.2346790 26.26172 -9.693961 2.862783e-11 8.624133e-09 15.72056
## MAP1B -1.1453005 28.17139 -9.600442 3.646452e-11 8.755890e-09 15.47983
## COX2 1.1924636 24.76338 9.498806 4.749380e-11 8.755890e-09 15.22074
## ECHA 0.8429808 27.50285 9.472523 5.086409e-11 8.755890e-09 15.15061
## TENS1 1.9414853 23.70594 9.350404 7.002698e-11 9.849825e-09 14.84393
## SYYC -1.1933111 25.39629 -9.331633 7.356716e-11 9.849825e-09 14.78666
## P4HA2 -1.9295327 25.43287 -9.202902 1.033016e-10 1.244784e-08 14.45967
num_spots <- nrow(data_norm_quantile_max)
full_list_diff <- topTable(efit_y, coef = 2, number = num_spots,
sort.by = "none")
Draw heatmap:
p_above_y <- full_list_diff$adj.P.Val < 0.05
dif_data_diff <- data_norm_quantile_max[p_above_y, ]
sum(p_above_y)
## [1] 406
heatmaply(dif_data_diff, main = "Differentiation vs Control", fontsize_row = 1, dendrogram = "col", scale_fill_gradient_fun = ggplot2::scale_fill_gradient2(low = "lightseagreen", high = "orangered3", midpoint = 15))
Draw Volcano plot:
EnhancedVolcano(full_list_diff,
lab = rownames(full_list_diff),
x = 'logFC',
y = 'adj.P.Val',
title = "Differentially expressed genes\n condition = 'Differentiation'",
subtitle = NULL,
pCutoff = 0.05,
FCcutoff = 0.1,
legend = c("Not significant","Log2FC","Padj","Padj & Log2FC"),
legendPosition = "right",
col=c("lightcyan4","#f57002", "#ee9f02", "#0a9278"))
In order to group significantly differentially expressed proteins we have used GO enrichment analysis.
# keep only the significant proteins results
sig <- subset(full_list_diff, adj.P.Val < 0.05)
# get the significant up-regulated proteins
up <- subset(full_list_diff, logFC > 0)
# get the significant down-regulated proteins
down <- subset(full_list_diff, logFC < 0)
# needed to convert to enrichResult object
up_names <- gconvert(row.names(up))
down_names <- gconvert(row.names(down))
# enrichment analysis using proteins names
multi_gp_up_reg <- gost(list("up-regulated" = up_names$name), multi_query = FALSE, evcodes =TRUE)
# modify the g:Profiler data frame
gp_mod_up = multi_gp_up_reg$result[, c("query", "source", "term_id","term_name", "p_value", "query_size", "intersection_size", "term_size", "effective_domain_size", "intersection")]
gp_mod_up <- gp_mod_up[order(gp_mod_up$p_value), ]
gp_mod_up_BP <- gp_mod_up[gp_mod_up$source == "GO:BP", ]
gp_mod_up_BP$GeneRatio <- paste0(gp_mod_up_BP$intersection_size, "/", gp_mod_up_BP$query_size)
gp_mod_up_BP$BgRatio <- paste0(gp_mod_up_BP$term_size, "/", gp_mod_up_BP$effective_domain_size)
names(gp_mod_up_BP) <- c("Cluster", "Category", "ID", "Description", "p.adjust", "query_size", "Count", "term_size", "effective_domain_size", "geneID", "GeneRatio", "BgRatio")
gp_mod_up_BP$geneID <- gsub(",", "/", gp_mod_up_BP$geneID)
row.names(gp_mod_up_BP) <- gp_mod_up_BP$ID
gp_mod_enrich_up_BP <- new("enrichResult", result = gp_mod_up_BP)
Draw enrichment plot:
enrichplot::dotplot(gp_mod_enrich_up_BP, showCategory = 10) + ggplot2::labs(title = "up-regulated") + ggplot2::scale_color_gradient(low = "lightseagreen", high = "darkorange1")
# enrichment analysis using gene names
multi_gp_down_reg <- gost(list("down-regulated" = down_names$name), multi_query = FALSE, evcodes =TRUE)
# modify the g:Profiler data frame
gp_mod_down = multi_gp_down_reg$result[, c("query", "source", "term_id","term_name", "p_value", "query_size", "intersection_size", "term_size", "effective_domain_size", "intersection")]
gp_mod_down <- gp_mod_down[order(gp_mod_down$p_value), ]
# BP
gp_mod_down_BP <- gp_mod_down[gp_mod_down$source == "GO:BP", ]
gp_mod_down_BP$GeneRatio <- paste0(gp_mod_down_BP$intersection_size, "/", gp_mod_down_BP$query_size)
gp_mod_down_BP$BgRatio <- paste0(gp_mod_down_BP$term_size, "/", gp_mod_down_BP$effective_domain_size)
names(gp_mod_down_BP) <- c("Cluster", "Category", "ID", "Description", "p.adjust", "query_size", "Count", "term_size", "effective_domain_size", "geneID", "GeneRatio", "BgRatio")
gp_mod_down_BP$geneID <- gsub(",", "/", gp_mod_down_BP$geneID)
gp_mod_enrich_down_BP <- new("enrichResult", result = gp_mod_down_BP)
Draw enrichment plot:
enrichplot::dotplot(gp_mod_enrich_down_BP, showCategory = 10) + ggplot2::labs(title = "down-regulated") + ggplot2::scale_color_gradient(low = "lightseagreen", high = "darkorange1")